libname Input "Input Data";
libname Output "Output Data";

proc import datafile="Input Data\BoardEx_ListingMatch.csv"
			dbms=dlm
			out=BX_ListingMatch
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 41000;
run;

proc sort data = Input.BoardEx_Emp out = Director (keep = DirectorID CompanyID OrgType HOCountryName DateStartRole DateEndRole RoleName); by CompanyID RoleName; run;
proc import datafile="Input Data\Open\Agency_BX_Map.csv"
			dbms=dlm
			out=Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 700;
run;
proc sort data = Agencies (keep = agcy_bx agencyid_contracts where = (CompanyID ~= . and AgencyID ~= .) rename = (agcy_bx = CompanyID agencyid_contracts = AgencyID)) nodupkey;
	by CompanyID AgencyID;
run;
proc sort data = Input.BoardEx_Comp out = Company (keep = BoardID CIKCODE HOADDRESS4 rename = (BoardID = CompanyID HOADDRESS4 = State_Long)) nodupkey; by BoardID; run;

data Company;
	set Company;	
	if State_Long = "Alabama" then State = "AL";
	if State_Long = "Alabama AL" then State = "AL";
	if State_Long = "Alaska AK" then State = "AK";
	if State_Long = "Arizona AZ" then State = "AZ";
	if State_Long = "Arkansas AR" then State = "AR";
	if State_Long = "California CA" then State = "CA";
	if State_Long = "Colorado CO" then State = "CO";
	if State_Long = "Connecticut CT" then State = "CT";
	if State_Long = "Delaware DE" then State = "DE";
	if State_Long = "District Of Columbia DC" then State = "DC";
	if State_Long = "Florida FL" then State = "FL";
	if State_Long = "Georgia GA" then State = "GA";
	if State_Long = "Hawaii HI" then State = "HI";
	if State_Long = "Idaho ID" then State = "ID";
	if State_Long = "Illinois IL" then State = "IL";
	if State_Long = "Indiana IN" then State = "IN";
	if State_Long = "Iowa IA" then State = "IA";
	if State_Long = "Kansas KS" then State = "KS";
	if State_Long = "Kentucky KY" then State = "KY";
	if State_Long = "Louisiana LA" then State = "LA";
	if State_Long = "Maine ME" then State = "ME";
	if State_Long = "Maryland MD" then State = "MD";
	if State_Long = "Massachusetts MA" then State = "MA";
	if State_Long = "Michigan MI" then State = "MI";
	if State_Long = "Minnesota MN" then State = "MN";
	if State_Long = "Mississippi MS" then State = "MS";
	if State_Long = "Missouri MO" then State = "MO";
	if State_Long = "Montana MT" then State = "MT";
	if State_Long = "Nebraska NE" then State = "NE";
	if State_Long = "Nevada NV" then State = "NV";
	if State_Long = "New Hampshire NH" then State = "NH";
	if State_Long = "New Jersey NJ" then State = "NJ";
	if State_Long = "New Mexico NM" then State = "NM";
	if State_Long = "New York NY" then State = "NY";
	if State_Long = "North Carolina NC" then State = "NC";
	if State_Long = "North Dakota ND" then State = "ND";
	if State_Long = "Ohio OH" then State = "OH";
	if State_Long = "Oklahoma OK" then State = "OK";
	if State_Long = "Oregon OR" then State = "OR";
	if State_Long = "Pennsylvania PA" then State = "PA";
	if State_Long = "Rhode Island RI" then State = "RI";
	if State_Long = "South Carolina SC" then State = "SC";
	if State_Long = "South Dakota SD" then State = "SD";
	if State_Long = "Tennessee TN" then State = "TN";
	if State_Long = "Texas TX" then State = "TX";
	if State_Long = "Utah" then State = "UT";
	if State_Long = "Utah UT" then State = "UT";
	if State_Long = "Vermont VT" then State = "VT";
	if State_Long = "Virginia VA" then State = "VA";
	if State_Long = "Washington WA" then State = "WA";
	if State_Long = "West Virginia WV" then State = "WV";
	if State_Long = "Wisconsin WI" then State = "WI";
	if State_Long = "Wyoming WY" then State = "WY";
	drop State_Long;
run;

proc import datafile="Input Data\Open\Plum_Book_2016.csv"
			dbms=dlm
			out=Plum_2016
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 9000;
run;
proc import datafile="Input Data\Open\BoardEx_PlumBook_Agencies.csv"
			dbms=dlm
			out=PB_Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 2000;
run;

/* Select set of "powerful" employees */
data Plum_2016;
	set Plum_2016 (keep = Agcy_Name Org_Name Title Type_of_Appt Pay_Plan Pay);
	if Type_of_Appt ~= "PAS" and Type_of_Appt ~= "PA" then delete;
run;

proc sql;
	create table Plum as
		select * from Plum_2016 a, PB_Agencies (where = (CompanyID ~= .)) b
			where a.Agcy_Name = b.Agcy_Name and a.Org_Name = b.Org_Name;
quit;

proc sort data = Plum nodupkey; by CompanyID Title; run;

data Director;
	merge Plum (in = b drop = Agcy_Name Org_Name Pay_Plan Pay rename = (Title = RoleName)) Director (in = a);
	by CompanyID RoleName;
	if a = 1;
	TopRole = 0;
	if b = 1 then TopRole = 1;
	ContractRole = 1;
	if find(rolename,"Contract") = 0 and find(rolename,"contract") = 0
		and find(rolename,"Procurement") = 0 and find(rolename,"procurement") = 0
		and find(rolename,"Acquisition") = 0 and find(rolename,"acquisition") = 0
		and find(rolename,"Pric") = 0 and find(rolename,"pric") = 0 then ContractRole = 0;
	if find(rolename,"Contractor") ~= 0 then ContractRole = 0;
	if DateStartRole = .N then delete;
	if DateEndRole = .N then delete;
	if DateEndRole = .C then DateEndRole = mdy(12,31,2019);
	if DateEndRole < DateStartRole then delete;

	/* Dropping director-companies with zero tenure */
	if CompanyID = 33491 and DirectorID = 1879228 then delete;
	if CompanyID = 127647 and DirectorID = 1002662 then delete;
run;

proc sort data = BX_ListingMatch; by CompanyID; run;

data Director;
	merge Director (in = a) BX_ListingMatch (in = b);
	by CompanyID;
	if a = 1;
	if b = 1 then CompanyID = CompanyID_New;
	drop CompanyID_New;
run;

data Director_Roles;
	set Director;
	keep RoleName CompanyID DirectorID DateStartRole DateEndRole;
run;

proc sort data = Director; by DirectorID CompanyID DateStartRole; run;

data Director;
	keep DirectorID CompanyID DateStartRole_new DateEndRole HOCOUNTRYNAME ORGTYPE TopRole ContractRole RoleName;
	rename DateStartRole_new = DateStartRole; 
	set Director;
	by DirectorID CompanyID;
	retain DateStartRole_new DateEndRole_last;
	format DateStartRole_new YYMMDDN8.;
	if first.CompanyID then do;
		DateStartRole_new = DateStartRole;
		DateEndRole_last = .;
		TopRole_new = TopRole;
		ContractRole_new = ContractRole;
	end;
	else do;
		if year(DateStartRole) - year(DateEndRole_last) > 1 then DateStartRole_new = DateStartRole;
		TopRole_new = TopRole_new + TopRole;
		ContractRole_new = ContractRole_new + ContractRole;
	end;
	DateEndRole_last = DateEndRole;
	if TopRole_new > 0 then TopRole = 1;
	if ContractRole_new > 0 then ContractRole = 1;
run;

proc sort data = Director; by DirectorID CompanyID DateStartRole descending DateEndRole; run;
proc sort data = Director nodupkey; by DirectorID CompanyID DateStartRole; run;

data Director;
	set Director;
	by DirectorID CompanyID;
	at = DateEndRole - DateStartRole;
	retain Tenure;
	if first.CompanyID then do;
		Tenure = at;
	end;
	else do;
		Tenure = Tenure + at;
	end;
	drop at;
run;

proc sql;
	create table Director_Company1 as
		select * from Director a, Agencies b
			where a.CompanyID = b.CompanyID;
quit;

data Director_Company1;
	set Director_Company1;
	Connect = 1;
	Connect_Top = 0;
	if TopRole = 1 then Connect_Top = 1;
	Connect_Contract = 0;
	if ContractRole = 1 then Connect_Contract = 1;
run;

proc sort data = Director; by CompanyID; run;

data Director_Company2;
	merge Director (in = a) Agencies (in = b);
	by CompanyID;
	if b ~= 1;
	Connect = 0;
	Connect_Top = 0;
	Connect_Contract = 0;
run;

proc append base = Director_Company1 data = Director_Company2; run;

proc sort data = Director_Company1; by CompanyID; run;
proc sort data = Company; by CompanyID; run;

data Director_Company;
	merge Director_Company1 (in = a) Company;
	by CompanyID;
	if a = 1;	
	if HOCountryName ~= "United States" then delete;
run;

data Director_Agency;
	set Director_Company;
	if Connect = 0 then delete;
	rename DateStartRole = AgencyStartDate DateEndRole = AgencyEndDate Tenure = Agency_Tenure RoleName = AgencyRole;
run;

proc sort data = Director_Company (drop = AgencyID Connect:); by DirectorID CompanyID; run;

proc sql;
	create table Director_Company_Agency as
		select * from Director_Company a, Director_Agency (keep = DirectorID AgencyEndDate AgencyStartDate Agency_Tenure AgencyID AgencyRole Connect Connect_Top Connect_Contract) b
			where a.DirectorID = b.DirectorID and a.DateEndRole > b.AgencyStartDate;
quit;

proc sort data = Director_Company_Agency; by CompanyID DirectorID DateEndRole DateStartRole AgencyID descending AgencyEndDate; run;
proc sort data = Director_Company_Agency nodupkey; by CompanyID DirectorID DateEndRole DateStartRole AgencyID; run;

data Year_List;
	do Year = 1990 to 2019;
		output;
	end;
run;

proc import datafile="Input Data\BoardEx_Coverage_20220520.csv"
			dbms=dlm
			out=BX_Coverage
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 41000;
run;

proc sort data = BX_Coverage; by Company_ID; run;
proc sort data = BX_ListingMatch; by CompanyID; run;

data BX_Coverage;
	merge BX_Coverage (in = a rename = (Company_ID = CompanyID)) BX_ListingMatch (in = b);
	by CompanyID;
	if a = 1;
	if First_AR_Date ~= "Current" and First_AR_Date ~= "#N/A" then First_Year = input(First_AR_Date, 8.) - 21916;
	if First_AR_Date = "Current" then First_Year = mdy(05,20,2022);
	format First_Year mmddyy10.;
	if b = 1 then CompanyID = CompanyID_New;
run;

proc sort data = BX_Coverage; by CompanyID; run;

proc means data = BX_Coverage noprint;
	by CompanyID;
	var First_Year;
	output out = BX_Coverage min = First_Year;
run;

proc sql;
	create table Director_Company_Agency_Year as
		select * from Director_Company_Agency a, Year_List b, BX_Coverage (keep = CompanyID First_Year) c
			where year(a.DateStartRole) <= b.Year and year(a.DateEndRole) >= b.Year and a.CompanyID = c.CompanyID and b.Year >= year(c.First_Year+1);
quit;

proc sort data = Director_Company_Agency_Year; by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Agency: Connect: Year First_Year; run;
proc means data = Director_Company_Agency_Year noprint;
	by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Agency: Connect: Year First_Year;
	var Tenure DateEndRole;
	output out = Director_Company_Agency_Year_U min(DateStartRole) = DateStartRole max = Tenure DateEndRole;
run;

data Director_Company_Agency_Year_U;
	set Director_Company_Agency_Year_U;
	
	Company_Years = (DateEndRole - DateStartRole)/365;
	Cool_Off = (DateStartRole - AgencyEndDate)/365;
	Agency_Years = (AgencyEndDate - AgencyStartDate)/365;

	if year(AgencyStartDate) > Year then Connect = 0;
	if year(AgencyStartDate) > Year then Connect_Top = 0;
	if year(AgencyStartDate) > Year then Connect_Contract = 0;
	if Cool_Off < 0 then Cool_Off = 0;

	if DateStartRole - AgencyStartDate < 0 then Connect = 0;
	if DateStartRole - AgencyStartDate < 0 then Connect_Top = 0;
	if DateStartRole - AgencyStartDate < 0 then Connect_Contract = 0;
	
	/* SET CUTOFF FOR COOL OFF PERIOD */
	Connect_DW = Connect;
	if Cool_Off <= 2 then Connect_DW = 0;
	if Cool_Off > 2 then Connect = 0;
	if Cool_Off > 2 then Connect_Top = 0;
	if Cool_Off > 2 then Connect_Contract = 0;

	Appointment = 0;
	if year(DateStartRole) = Year and Connect = 1 then Appointment = 1;
	Appointment_DW = 0;
	if year(DateStartRole) = Year and Connect_DW = 1 then Appointment_DW = 1;
	Appointment_Top = 0;
	if year(DateStartRole) = Year and Connect_Top = 1 then Appointment_Top = 1;
	Appointment_Contract = 0;
	if year(DateStartRole) = Year and Connect_Contract = 1 then Appointment_Contract = 1;
	
	Departure = 0;
	if year(DateEndRole) = Year and Connect = 1 then Departure = 1;
	Departure_DW = 0;
	if year(DateEndRole) = Year and Connect_DW = 1 then Departure_DW = 1;
	Departure_Top = 0;
	if year(DateEndRole) = Year and Connect_Top = 1 then Departure_Top = 1;
	Departure_Contract = 0;
	if year(DateEndRole) = Year and Connect_Contract = 1 then Departure_Contract = 1;
run;

proc sort data = Director_Company_Agency_Year_U; by CompanyID AgencyID Year DirectorID; run;

proc means data = Director_Company_Agency_Year_U noprint;
	by CompanyID AgencyID Year;
	var Connect Connect_DW Connect_Top Connect_Contract;
	output out = Company_Agency_Year sum = Connect_Num Connect_Num_DW Connect_Num_Top Connect_Num_Contract
									max(Appointment Appointment_DW Appointment_Top Appointment_Contract) = Appointment Appointment_DW Appointment_Top Appointment_Contract
									max(Departure Departure_DW Departure_Top Departure_Contract) = Departure Departure_DW Departure_Top Departure_Contract;
run;






proc sql;
	create table Director_Company_Year as
		select * from Director_Company a, Year_List b, BX_Coverage (keep = CompanyID First_Year) c
			where year(a.DateStartRole) <= b.Year and year(a.DateEndRole) >= b.Year and a.CompanyID = c.CompanyID and b.Year >= year(c.First_Year+1);
quit;

proc sort data = Director_Company_Year; by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Year First_Year; run;
proc means data = Director_Company_Year noprint;
	by CompanyID DirectorID HOCOUNTRYNAME ORGTYPE CIKCODE State Year First_Year;
	var Tenure DateEndRole;
	output out = Director_Company_Year_U min(DateStartRole) = DateStartRole max = Tenure DateEndRole;
run;

data Director_Company_Year_U;
	set Director_Company_Year_U;
	if ORGTYPE ~= "Private" and ORGTYPE ~= "Quoted" and ORGTYPE ~= "Partnership" then delete;
	Public = 0;
	if ORGTYPE = "Quoted" then Public = 1;
	drop ORGTYPE;
run;

proc sort data = Director_Company_Year_U; by CompanyID State Year; run;

proc means data = Director_Company_Year_U noprint;
	by CompanyID State Year;
	var DirectorID;
	output out = Company_Year n = n_People min(Public) = Public;
run;

proc sort data = Agencies out = Agencies_RD nodupkey; by AgencyID; run;

proc sql;
	create table Company_Agency_Year_Full as
		select * from Company_Year (drop = _:) a, Agencies_RD (keep = AgencyID) b;
quit;

proc sort data = Company_Agency_Year; by CompanyID AgencyID Year; run;
proc sort data = Company_Agency_Year_Full; by CompanyID AgencyID Year; run;

data Company_Agency_Year_Full;
	merge Company_Agency_Year (drop = _:) Company_Agency_Year_Full (in = b);
	by CompanyID AgencyID Year;
	if b = 1;
	if Connect_Num = . then Connect_Num = 0;
	if Connect_Num_DW = . then Connect_Num_DW = 0;
	if Connect_Num_Top = . then Connect_Num_Top = 0;
	if Connect_Num_Contract = . then Connect_Num_Contract = 0;
	if Appointment = . then Appointment = 0;
	if Appointment_DW = . then Appointment_DW = 0;
	if Appointment_Top = . then Appointment_Top = 0;
	if Appointment_Contract = . then Appointment_Contract = 0;
	if Departure = . then Departure = 0;
	if Departure_DW = . then Departure_DW = 0;
	if Departure_Top = . then Departure_Top = 0;
	if Departure_Contract = . then Departure_Contract = 0;
run;

proc import datafile="Input Data\BX_CIQ_Merge_NAICS5.csv"
			dbms=dlm
			out=NAICS1
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;
proc import datafile="Input Data\CIQ_NAICS.csv"
			dbms=dlm
			out=NAICS2
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;

proc sort data = NAICS1; by CompanyID; run;
proc sort data = NAICS2; by CompanyID; run;

data NAICS;
	merge NAICS1 (in = a) NAICS2 (in = b keep = NAICS_Code CompanyID);
	by CompanyID;
	if a = 1 and b = 1;
	NAICS = NAICS_Code;
	if floor(NAICS_Code/100000) = 0 then NAICS = NAICS_Code*10;
	if floor(NAICS_Code/10000) = 0 then NAICS = NAICS_Code*100;
	if floor(NAICS_Code/1000) = 0 then NAICS = NAICS_Code*1000;
	if floor(NAICS_Code/100) = 0 then NAICS = NAICS_Code*10000;
	if floor(NAICS_Code/10) = 0 then NAICS = NAICS_Code*100000;
	keep BoardID NAICS;
run;

proc sort data = NAICS (rename = (BoardID = CompanyID)) nodupkey; by CompanyID; run;
proc sort data = Company_Agency_Year_Full; by CompanyID Year; run;

data CAY;
	merge Company_Agency_Year_Full (in = a) NAICS (keep = CompanyID NAICS in = b);
	by CompanyID;
	if a = 1 and b = 1;
run;













/* Merge in Unique_Contracts */

proc sort data = Output.Contracts_Year; by CompanyID AgencyID Year; run;
proc sort data = CAY; by CompanyID AgencyID Year; run;

data Contracts_Analysis;
	merge CAY (in = a) Output.Contracts_Year (drop = _:);
	by CompanyID AgencyID Year;
	if a = 1;
	if n_Contracts = . then n_Contracts = 0;
	if Fixed_Price = . then Fixed_Price = 0;
	if Cost_Plus = . then Cost_Plus = 0;
	if Incentives = . then Incentives = 0;
run;












/* Import RegData by industry */

proc import datafile="Input Data\6-digit.csv"
			dbms=dlm
			out=RegData_Ind6
			replace;
		delimiter=',';
		getnames=yes;
run;

proc import datafile="Input Data\5-digit.csv"
			dbms=dlm
			out=RegData_Ind5
			replace;
		delimiter=',';
		getnames=yes;
run;

data RegData_Ind5;
	set RegData_Ind5;
	label = label*10;
	probability2 = probability;
	drop probability;
run;

proc sort data = RegData_Ind6; by Year Title Part Label; run;
proc sort data = RegData_Ind5; by Year Title Part Label; run;

data RegData_Ind;
	merge RegData_Ind6 RegData_Ind5;
	by Year Title Part Label;
	if probability = . then probability = probability2;
	drop probability2;
run;





/* Import RegData stats */

proc import datafile="Input Data\restrictions.csv"
			dbms=dlm
			out=Restrictions
			replace;
		delimiter=',';
		getnames=yes;
run;

proc sort data = Restrictions out = RegData_Stats (keep = Year Title Part Restrictions); by Year Title Part; run;









/* Import RegData by agencies */

proc import datafile="Input Data\metadata.csv"
			dbms=dlm
			out=Agency_Links
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 345000;
run;

proc import datafile="Input Data\Open\Agency_BX_Map.csv"
			dbms=dlm
			out=Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 700;
run;

proc sort data = Agencies (keep = department_id agency_id agencyid_contracts where = (agencyid_contracts ~= . and department_id ~= .)) nodupkey;
	by department_id agency_id agencyid_contracts;
run;

proc sql;
	create table RegData_Agency as
		select * from Agency_Links a, Agencies b
			where a.Agency_ID = b.agency_id and a.Department_ID = b.department_id;
quit;

data RegData_Agency;
	set RegData_Agency;
	AgencyID = agencyid_contracts;
	if Agency_ID = 999999 then delete;
	drop Agency Agency_ID Department Department_ID Key Document_ID agencyid_contracts;
run;






/* Merge everything together */

proc sql;
	create table RegData as
		select * from RegData_Ind a, RegData_Agency b, RegData_Stats c
			where a.Year = b.Year and b.Year = c.Year and a.Title = b.Title and b.Title = c.Title and a.Part = b.Part and b.Part = c.Part;
quit;

data RegData;
	set RegData;
	Restrictions_Eff = Restrictions*Probability;
run;

proc sort data = RegData (rename = (Label = NAICS)); by AgencyID Year NAICS; run;

proc means data = RegData noprint;
	by AgencyID Year NAICS;
	var Restrictions_Eff;
	output out = RegData_Unique sum = Restrictions;
run;

proc sort data = RegData_Unique out = Output.RegData_Run_Contracts; by NAICS AgencyID Year; run;






/* Final merge */

proc sort data = Contracts_Analysis; by NAICS AgencyID Year; run;
proc sort data = Output.RegData_Run_Contracts; by NAICS AgencyID Year; run;

data Output.Contracts_Analysis;
	merge Contracts_Analysis (in = a) Output.RegData_Run_Contracts (in = b);
	by NAICS AgencyID Year;
	if a = 1;
run;


proc export data = Output.Contracts_Analysis outfile = "Output Data\contracts_analysis.dta"
	dbms = dta replace;
run;






* TO CALCULATE MOST COMMON AGENCY ROLE TITLES;

proc sort data = Director_Company_Agency_Year_U out = Roles nodupkey; by CompanyID DirectorID AgencyRole; run;

proc sort data = Roles; by AgencyRole; run;
proc means data = Roles (where = (Connect_Top = 1)) noprint;
	by AgencyRole;
	var Connect_Top;
	output out = Roles2 (drop = _:) sum = n_connect n = n_FR;
run;
proc means data = Roles (where = (Connect_Top = 1)) noprint;
	var Connect_Top;
	output out = Roles3 (drop = _:) sum = total;
run;

data Roles2;
	set Roles2;
	per = n_connect/305;
run;

proc sort data = Roles2; by descending n_FR; run;
